1 INTRODUCTION

Prepared this Sales Analysis for Bloom Daddy, a purveyor of rare, exotic houseplants.

When I was consulting with him, he seemed keen to understand a few facets of the data provided by Etsy on his online business. Namely, he wanted a lens into his business that I outline below:

  1. How are his sales doing, both recent and historically?
  2. What products drove the most revenue?
  3. Who is contributing to the revenue the most?
  4. Can we optimize the marketing strategy with #3 in mind?

Below you’ll find a report developed to answer some of these questions. We dive into recent numbers, look at Week-over-Week revenue, dive into an RFM (Recency, Frequency, Monetary) Analysis to segment and strategise, and then outline the top items, groups, and consumers from the data.

2 PREPARATION

In this section, you’ll find all of my work to prepare for the analysis including the R packages I utilize and the data import.

2.1 PACKAGES

knitr::opts_chunk$set(echo = TRUE)

options(ggplot2.discrete.color = "viridis")
options(ggplot2.continuous.color = "viridis")
options(ggplot2.continuous.fill = "viridis")
options(ggplot2.discrete.fill = "viridis")

options(scipen = 999)
# getwd()

#packages ----

#Workhorse
library(tidyverse)
library(lubridate)
library(rfm)

#Import & Export
library(readxl)
library(writexl)
library(readr)

#Formatting & Visualization
library(ggdist)
library(ggrepel)
library(tidyquant)
library(hrbrthemes)
library(kableExtra)
library(viridisLite)
library(scales)
library(DT) 

2.2 FUNCTIONS

detect_na <- function(data) {
    
fdat1 <- data %>%
    summarise_all(~ sum(!is.na(.)))
    
fdat2 <- data %>%
    summarise_all(~ sum(is.na(.)))
    
fdat3 <- data %>% 
    summarise_all(~ sum(is.na(.)) / length(.))
    
    fdat1_2 <-  fdat1 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "non_NULL")
    
    fdat2_2 <- fdat2 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "NULL")
    
    fdat3_2 <- fdat3 %>% 
        pivot_longer(everything(), names_to = "column_names", values_to = "percent_NULL")
    
    fdat1_2 %>% 
        left_join(fdat2_2, by = c("column_names")) %>% 
        left_join(fdat3_2, by = c("column_names")) %>% 
        arrange(desc(percent_NULL)) %>% 
        kbl(align = "l", format.args = list(big.mark = ",")) %>% 
        kable_styling(
           full_width = F,
           bootstrap_options = c("hover", "responsive", "striped"))

}

tablekable <- function(data) {
     data %>% 
         kbl(align = "l") %>% 
         kable_styling(
            full_width = F,
            bootstrap_options = c("hover", "responsive", "striped"))
}

tabledata <- function(data) {
    data %>% 
        datatable(filter = "bottom", style = "bootstrap5")

   # “bootstrap5”, “bulma”, “dataTables”, “foundation”, “jqueryui”, “semanticui”
}

2.3 IMPORT DATA

dat_import <- read_excel("etsy + item attributes.xlsx", 
    sheet = "dat")

dat_import %>% glimpse()
## Rows: 2,774
## Columns: 39
## $ `Sale Date`         <dttm> 2021-12-29, 2021-12-20, 2021-12-14, 2021-12-10, 2…
## $ `Item Group Old`    <chr> "philodendron", "philodendron", "anthurium", "phil…
## $ `Item Name Old`     <chr> "philodendron florida ghost", "philodendron pink p…
## $ `Item Group`        <chr> "Tarzan", "Tarzan", "Korok", "Tarzan", "Heat Pack"…
## $ `Item Name`         <chr> "Tornado", "Hyrulian Princess", "Hope Korok", "Hyr…
## $ `Item Maturity`     <chr> "s", "s", NA, "s", NA, NA, "s", "s", "s", "s", NA,…
## $ `Item Variegation`  <chr> NA, "high", NA, "high", NA, NA, "high", "high", NA…
## $ `Ship Name`         <chr> "Daniel James Hurt", "Katy Hay", "Glenn Sato", "An…
## $ `Buyer ID`          <chr> "Decker", "Deadshadow", "Dawnreaper", "Dayspear", …
## $ Buyer               <chr> "Daniel Hurt (danieljhurt)", "Freud Who (Freud2)",…
## $ Quantity            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Price               <dbl> 55.0, 69.0, 135.0, 110.0, 6.0, 549.0, 69.0, 69.0, …
## $ `Coupon Code`       <chr> NA, NA, NA, NA, "cyber2021;cyber2021", "cyber2021;…
## $ `Coupon Details`    <chr> NA, NA, NA, NA, "cyber2021 - % off;cyber2021 - % o…
## $ `Discount Amount`   <dbl> 0.00, 0.00, 0.00, 0.00, 55.50, 0.00, 6.90, 6.90, 3…
## $ `Shipping Discount` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Order Shipping`    <dbl> 11.5, 11.5, 11.5, 11.5, 0.0, 0.0, 11.5, 11.5, 11.5…
## $ `Order Sales Tax`   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Item Total`        <dbl> 55.0, 69.0, 135.0, 110.0, 6.0, 549.0, 69.0, 69.0, …
## $ Currency            <chr> "USD", "USD", "USD", "USD", "USD", "USD", "USD", "…
## $ `Transaction ID`    <dbl> 2814238693, 2803868757, 2790955322, 2781295834, 27…
## $ `Listing ID`        <dbl> 1084945150, 1022782347, 1098887289, 1138806105, 96…
## $ `Date Paid`         <dttm> 2021-12-29, 2021-12-20, 2021-12-14, 2021-12-10, 2…
## $ `Date Shipped`      <dttm> 2022-01-06, 2021-12-30, 2021-12-19, 2021-12-19, 2…
## $ `Ship Address1`     <chr> "1520 melody ln", "8507 N Capital Of Texas Hwy", "…
## $ `Ship Address2`     <chr> NA, "Apt 3013", NA, "#55", NA, NA, NA, NA, NA, NA,…
## $ `Ship City`         <chr> "Fullerton", "Austin", "Honolulu", "Cambridge", "B…
## $ `Ship State`        <chr> "CA", "TX", "HI", "MA", "WA", "WA", "HI", "CT", "F…
## $ `Ship Zipcode`      <chr> "92831", "78759-7997", "96815", "2139", "98010", "…
## $ `Ship Country`      <chr> "United States", "United States", "United States",…
## $ `Order ID`          <dbl> 2327112914, 2318304177, 2309507871, 2301819693, 22…
## $ Variations          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Order Type`        <chr> "online", "online", "online", "online", "online", …
## $ `Listings Type`     <chr> "listing", "listing", "listing", "listing", "listi…
## $ `Payment Type`      <chr> "online_cc", "online_cc", "online_cc", "online_cc"…
## $ `InPerson Discount` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `InPerson Location` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `VAT Paid by Buyer` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ SKU                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

2.4 TIDY DATA

dat_tidy <- dat_import %>%
    rename_with(tolower) %>% 
    rename_with(~ str_replace_all(
        string = .,
        pattern = " ",
        replacement =  "_")) %>%     
    rename(customer_name = buyer_id) %>% 
    select(
        sale_date, order_id, quantity, price, 
        item_group, item_name, item_maturity, item_variegation, 
        customer_name, #ship_address1, ship_address2, 
        ship_city, ship_state, ship_zipcode, ship_country
    ) %>% 
    mutate(sale_date = lubridate::as_date(sale_date),
           order_id = as.character(order_id)) %>% 
    filter(item_group != "heat pack") %>% 
    mutate_if(is.character, str_to_title) %>% 
    mutate_at(c("quantity", "price"), as.integer)


dat_tidy %>% glimpse()
## Rows: 2,774
## Columns: 13
## $ sale_date        <date> 2021-12-29, 2021-12-20, 2021-12-14, 2021-12-10, 2021…
## $ order_id         <chr> "2327112914", "2318304177", "2309507871", "2301819693…
## $ quantity         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price            <int> 55, 69, 135, 110, 6, 549, 69, 69, 99, 89, 6, 49, 20, …
## $ item_group       <chr> "Tarzan", "Tarzan", "Korok", "Tarzan", "Heat Pack", "…
## $ item_name        <chr> "Tornado", "Hyrulian Princess", "Hope Korok", "Hyruli…
## $ item_maturity    <chr> "S", "S", NA, "S", NA, NA, "S", "S", "S", "S", NA, NA…
## $ item_variegation <chr> NA, "High", NA, "High", NA, NA, "High", "High", NA, N…
## $ customer_name    <chr> "Decker", "Deadshadow", "Dawnreaper", "Dayspear", "Da…
## $ ship_city        <chr> "Fullerton", "Austin", "Honolulu", "Cambridge", "Blac…
## $ ship_state       <chr> "Ca", "Tx", "Hi", "Ma", "Wa", "Wa", "Hi", "Ct", "Fl",…
## $ ship_zipcode     <chr> "92831", "78759-7997", "96815", "2139", "98010", "980…
## $ ship_country     <chr> "United States", "United States", "United States", "U…

3 FOUNDATIONAL DATA

Here we produce the data that breathes life into the rest of the execution and experimentation in this paper.

3.1 ORDERLINES

Orderlines contains the individual line items making up the transactions.

dat_orderlines <- dat_tidy %>% 
    arrange(item_name, price, item_maturity, item_variegation, sale_date) %>% 
    select(sale_date, item_group, item_name, 
           quantity, price,
           customer_name, order_id) 

dat_orderlines %>% glimpse()
## Rows: 2,774
## Columns: 7
## $ sale_date     <date> 2021-10-10, 2021-10-10, 2021-10-29, 2021-10-29, 2021-11…
## $ item_group    <chr> "Tarzan", "Tarzan", "Tarzan", "Tarzan", "Tarzan", "Tarza…
## $ item_name     <chr> "Butterfly", "Butterfly", "Butterfly", "Butterfly", "But…
## $ quantity      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price         <int> 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, …
## $ customer_name <chr> "Featherdreamer", "Featherdreamer", "Evensteel", "Evenst…
## $ order_id      <chr> "2208961809", "2208961809", "2228859397", "2228859397", …

3.2 ORDERS

Orders is an aggregate view on Orderlines using the Order ID. It effectively shows what was purchased in that order. I’ve even fashioned a “receipt”.

dat_orders <- dat_orderlines %>% 
    group_by(sale_date, customer_name, order_id) %>% 
    mutate(receipt = paste0(item_name)) %>% 
    summarise(
        order_price = sum(price),
        order_quantity = sum(quantity),
        order_receipt = toString(unique(receipt)),
        .groups = "drop") 

dat_orders %>% glimpse()
## Rows: 1,235
## Columns: 6
## $ sale_date      <date> 2021-01-03, 2021-01-13, 2021-01-15, 2021-01-15, 2021-0…
## $ customer_name  <chr> "Youngvigor", "Younger", "Apatow", "York", "Youngblood"…
## $ order_id       <chr> "1911958567", "1914566518", "1916803730", "1916803730",…
## $ order_price    <int> 14, 178, 178, 178, 42, 178, 260, 178, 178, 260, 230, 29…
## $ order_quantity <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 4, 2, 2, 2…
## $ order_receipt  <chr> "Piranha Plant", "Eros Korok", "Eros Korok", "Eros Koro…

3.3 CUSTOMERS

This is where the consumers live and will be the backbone of the RFM analysis.

dat_customers <- dat_orderlines %>% 
    group_by(customer_name) %>% 
    mutate(items = paste0(item_name)) %>% 
    summarise(
        total_spent = sum(price),
        total_items = sum(quantity),
        total_orders = n_distinct(order_id),
        first_purchase = min(sale_date),
        last_purchase = max(sale_date),
        receipt = toString(unique(items))) %>% 
    ungroup() %>% 
    mutate(
        analysis_date = ymd("20220101"),
        tenure =  analysis_date - first_purchase,
        recency = as.integer( last_purchase - analysis_date ) ,
        monetary = total_spent,
        frequency = total_orders,
        total_spent = total_spent %>% scales::dollar(accuracy = 1)
    ) %>% select(customer_name, receipt,
                 recency, frequency, monetary, 
                 contains("total_"), 
                 everything()) 

dat_customers %>% glimpse() 
## Rows: 811
## Columns: 12
## $ customer_name  <chr> "Apatow", "Armas", "Arrington", "Ashbluff", "Ashsorrow"…
## $ receipt        <chr> "Dragonscale Splitleaf, Eros Korok, Gemheart Korok", "E…
## $ recency        <int> -248, -286, -295, -342, -335, -338, -338, -336, -335, -…
## $ frequency      <int> 3, 3, 5, 1, 2, 1, 2, 1, 1, 1, 1, 1, 6, 2, 1, 4, 3, 1, 2…
## $ monetary       <int> 456, 480, 884, 178, 356, 178, 356, 178, 376, 120, 178, …
## $ total_spent    <chr> "$456", "$480", "$884", "$178", "$356", "$178", "$356",…
## $ total_items    <int> 6, 8, 10, 2, 4, 2, 4, 2, 4, 2, 2, 4, 12, 4, 2, 8, 6, 2,…
## $ total_orders   <int> 3, 3, 5, 1, 2, 1, 2, 1, 1, 1, 1, 1, 6, 2, 1, 4, 3, 1, 2…
## $ first_purchase <date> 2021-01-15, 2021-01-20, 2021-01-23, 2021-01-24, 2021-0…
## $ last_purchase  <date> 2021-04-28, 2021-03-21, 2021-03-12, 2021-01-24, 2021-0…
## $ analysis_date  <date> 2022-01-01, 2022-01-01, 2022-01-01, 2022-01-01, 2022-0…
## $ tenure         <drtn> 351 days, 346 days, 343 days, 342 days, 341 days, 338 …

4 RECENT PERFORMANCE

4.1 PAST 7, 28, 120, & 365

Diving into the recent performance of Bloom Daddy. We use 7, 28, 120, and 365 day intervals as they reduce seasonality seen in other models. To elaborate, many people use 30 days, but this can add volatility to your numbers as you’re accounting for 4 weeks and 2 days vs. a clean 4 weeks. If you experience varied productivity throughout the week, the 30 days can hurt you depending on the seasonality.

dat_past_performance <- dat_orderlines %>% 
    mutate(
        analysis_date = ymd("20220101"),
        ddays = sale_date - analysis_date) %>% 
    mutate(
        ddays = ddays %>% as.integer(),
        flag_past7 = case_when(ddays >= -7 ~ 1, T ~ 0),
        flag_past28 = case_when(ddays >= -28 ~ 1, T ~ 0),
        flag_past120 = case_when(ddays >= -120 ~ 1, T ~ 0),
        flag_past365 = case_when(ddays >= -365 ~ 1, T ~ 0)
        ) %>% 
        select(sale_date, item_name, item_group, price, contains("past"))

dat_pp7 <- dat_past_performance %>% 
    filter(flag_past7 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past7")

dat_pp28 <- dat_past_performance %>% 
    filter(flag_past28 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past28")

dat_pp120 <- dat_past_performance %>% 
    filter(flag_past120 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past120")

dat_pp365 <- dat_past_performance %>% 
    filter(flag_past365 == 1) %>% 
    summarise(total_spend = sum(price)) %>% 
    mutate(phase = "past365")

dat_pp7 %>% rbind(dat_pp28) %>% rbind(dat_pp120) %>% rbind(dat_pp365) %>% 
    pivot_wider(names_from = phase, values_from = total_spend) %>% 
    mutate_all(scales::dollar) %>% 
    tablekable()
past7 past28 past120 past365
$1,796 $7,896 $33,384 $229,536

4.2 WEEKLY TIMELINE (WoW)

4.2.1 VIZ DATA

viz_timelineW <- dat_orderlines %>% 
    mutate(sale_week = FLOOR_WEEK(sale_date)) %>% 
    group_by(sale_week) %>% 
    summarise(total_spend = sum(price)) %>% 
    ggplot(aes(x = sale_week, 
        y = total_spend, 
        fill = total_spend,
        text = str_glue("Date: {sale_week}
                         Revenue: {scales::dollar(total_spend)}"))) +
    geom_smooth() +
    geom_col() +
    theme_ipsum() +
   # ylim(1, 365) +
    labs(
        title = "Sales over Time",
        subtitle = "",
        caption = "",
        x = "Week",
        y = "Total Spend",
        fill = "Total Spend"#,
       # color = "Persona"
       ) +
    viridis::scale_fill_viridis(direction = 1, discrete = FALSE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

4.2.2 INTERACTIVE

plotly::ggplotly(viz_timelineW, tooltip = "text")

4.2.3 STATIC

viz_timelineW

4.3 MONTHLY TIMELINE (MoM)

4.3.1 VIZ DATA

viz_timelineM <- dat_orderlines %>% 
    mutate(sale_month = FLOOR_MONTH(sale_date)) %>% 
    group_by(sale_month) %>% 
    summarise(total_spend = sum(price)) %>% 
    ggplot(aes(x = sale_month, 
        y = total_spend, 
        fill = total_spend,
        text = str_glue("Date: {sale_month}
                         Revenue: {scales::dollar(total_spend)}"))) +
    geom_smooth() +
    geom_col() +
    theme_ipsum() +
   # ylim(1, 365) +
    labs(
        title = "Sales over Time",
        subtitle = "",
        caption = "",
        x = "Month",
        y = "Total Spend",
        fill = "Total Spend"#,
       # color = "Persona"
       ) +
    viridis::scale_fill_viridis(direction = 1, discrete = FALSE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

4.3.2 INTERACTIVE

plotly::ggplotly(viz_timelineM, tooltip = "text")

4.3.3 STATIC

viz_timelineM

5 RFM ANALYSIS

An RFM Analysis is built on three components:

  1. Recency - the days since a consumer last purchased.
  2. Frequency - the total number of orders purchased.
  3. Monetary - the total revenue generated by the consumer.

By using these facets, we can segment our data base and prescribe strategies tailored to the user’s behavior. For instance, a user with high monetary and frequency values, but low recency can be targeted with marketing to intervene and prevent them from lapsing.

5.1 CALCULATING RFM (n=3)

dat_rfm3 <- dat_customers %>% 
    cbind(dat_orderlines %>% 
    group_by(customer_name) %>% 
    mutate(items = paste0(item_name)) %>% 
    summarise(
        total_spent = sum(price),
        total_items = sum(quantity),
        total_orders = n_distinct(order_id),
        first_purchase = min(sale_date),
        last_purchase = max(sale_date),
        receipt = toString(unique(items))) %>% 
    ungroup() %>% 
    mutate(
        analysis_date = ymd("20220101"),
        tenure =  analysis_date - first_purchase,
        recency = last_purchase - analysis_date,
        monetary = total_spent,
        frequency = total_orders,
        total_spent = total_spent %>% scales::dollar(accuracy = 1)
    ) %>% select(customer_name, contains("total_"), receipt, 
                 recency, frequency, monetary, tenure, 
                 everything()) %>% 
        summarise(r = ntile(x = recency, n = 3),
                  f = ntile(x = frequency, n = 3),
                  m = ntile(x = monetary, n = 3))) %>% 
    mutate(rfm_score = paste(r,f,m),
           rfm_sum = as.integer(r)+as.integer(f)+as.integer(m)) %>% 
    select(customer_name, contains("rfm_"),
           r,f,m,recency, frequency, monetary,
           total_spent, total_orders, total_items, 
           last_purchase, tenure, everything()
               ) %>% 
    arrange(desc(monetary), recency, desc(frequency))

dat_rfm3 %>% glimpse()
## Rows: 811
## Columns: 17
## $ customer_name  <chr> "Ronchegnac", "Shieldtrap", "Plaingrove", "Vernize", "V…
## $ rfm_score      <chr> "3 3 3", "1 3 3", "2 3 3", "1 3 3", "1 3 3", "1 3 3", "…
## $ rfm_sum        <int> 9, 7, 8, 7, 7, 7, 9, 5, 8, 8, 5, 6, 7, 9, 8, 5, 7, 7, 8…
## $ r              <int> 3, 1, 2, 1, 1, 1, 3, 1, 2, 2, 1, 1, 1, 3, 2, 1, 1, 1, 2…
## $ f              <int> 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 1, 2, 3, 3, 3, 1, 3, 3, 3…
## $ m              <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ recency        <int> -128, -233, -179, -230, -305, -285, -163, -285, -216, -…
## $ frequency      <int> 5, 2, 3, 5, 2, 2, 3, 1, 2, 7, 1, 1, 2, 2, 2, 1, 2, 2, 6…
## $ monetary       <int> 2506, 1896, 1804, 1748, 1538, 1526, 1486, 1416, 1378, 1…
## $ total_spent    <chr> "$2,506", "$1,896", "$1,804", "$1,748", "$1,538", "$1,5…
## $ total_orders   <int> 5, 2, 3, 5, 2, 2, 3, 1, 2, 7, 1, 1, 2, 2, 2, 1, 2, 2, 6…
## $ total_items    <int> 14, 10, 12, 12, 4, 10, 6, 8, 8, 14, 2, 2, 4, 4, 4, 2, 6…
## $ last_purchase  <date> 2021-08-26, 2021-05-13, 2021-07-06, 2021-05-16, 2021-0…
## $ tenure         <drtn> 212 days, 234 days, 207 days, 315 days, 309 days, 285 …
## $ receipt        <chr> "Dragonscale Splitleaf, Hyrulian Princess, Marbled Spli…
## $ first_purchase <date> 2021-06-03, 2021-05-12, 2021-06-08, 2021-02-20, 2021-0…
## $ analysis_date  <date> 2022-01-01, 2022-01-01, 2022-01-01, 2022-01-01, 2022-0…

5.2 DEFINING PERSONAS

Personas use Recency, Frequency, and Monetary in a 3D-fashion to help articulate tactics for the different consumers present. Based on our definition, we have six options total: Champions, Loyal, Recent, High Potential, Needs Nurturing, and Inactive. Unfortunately, we were yet to produce Champions as they need consistently high marks across all three facets.

5.2.1 PERSONAS OVERVIEW

dat_rfm_persona1 <- dat_rfm3 %>% 
    mutate(fm = f+m) %>% 
    mutate(
        rfm_persona = case_when(
            r == 3 & between(fm,8,10) ~ "Champions",
            r == 3 & between(fm,3,7) ~ "Loyal",
            r == 3 & between(fm,2,2) ~ "Recent",
            r == 2 & between(fm,6,10) ~ "High Potential",
            r == 2 & between(fm,2,5) ~ "Needs Nurturing",
            r == 1 & between(fm,2,10) ~ "Inactive",
            T ~ "")) %>% 
    select(customer_name, contains("rfm_p"), contains("rfm_"), everything())

dat_rfm_persona1$rfm_persona <- factor(dat_rfm_persona1$rfm_persona, 
         levels = c("Champions", "Loyal", "Recent", 
                    "High Potential", "Needs Nurturing", 
                    "Inactive")) 

dat_rfm_persona1 %>% 
    group_by(rfm_persona) %>% 
    summarise(n = n(),
              min_r = min(recency),
              max_r = max(recency),
              min_f = min(frequency),
              max_f = max(frequency),
              min_m = min(monetary),
              max_m = max(monetary)
              )  %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% 
    select(rfm_persona, n, perc_total, everything()) %>% 
    tablekable()
rfm_persona n perc_total min_r max_r min_f max_f min_m max_m
Loyal 166 20.5% -173 -2 1 6 56 2506
Recent 104 12.8% -167 -1 1 1 28 138
High Potential 68 8.4% -220 -176 2 7 276 1804
Needs Nurturing 202 24.9% -222 -173 1 3 36 1060
Inactive 271 33.4% -363 -222 1 5 14 1896

5.2.2 RFM RANGES

5.2.2.1 PERSONAS DEEP DIVE

dat_rfm_persona1 %>% 
    group_by(rfm_persona,rfm_score,r,f,m) %>% 
    summarise(n = n(),
              min_r = min(recency),
              max_r = max(recency),
              min_f = min(frequency),
              max_f = max(frequency),
              min_m = min(monetary),
              max_m = max(monetary)
              ) %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% 
    select(rfm_persona, rfm_score, r,f,m,n, perc_total, everything()) %>% 
    tablekable()
## `summarise()` has grouped output by 'rfm_persona', 'rfm_score', 'r', 'f'. You
## can override using the `.groups` argument.
rfm_persona rfm_score r f m n perc_total min_r max_r min_f max_f min_m max_m
Loyal 3 1 2 3 1 2 37 100% -167 -2 1 1 150 270
Loyal 3 1 3 3 1 3 23 100% -166 -13 1 1 278 1110
Loyal 3 2 1 3 2 1 5 100% -155 -36 2 2 68 118
Loyal 3 2 2 3 2 2 14 100% -128 -2 2 2 166 276
Loyal 3 2 3 3 2 3 17 100% -161 -3 2 2 278 908
Loyal 3 3 1 3 3 1 15 100% -149 -97 2 2 56 118
Loyal 3 3 2 3 3 2 24 100% -170 -43 2 3 146 276
Loyal 3 3 3 3 3 3 31 100% -173 -32 2 6 296 2506
Recent 3 1 1 3 1 1 104 100% -167 -1 1 1 28 138
High Potential 2 3 3 2 3 3 68 100% -220 -176 2 7 276 1804
Needs Nurturing 2 1 1 2 1 1 36 100% -218 -174 1 1 60 138
Needs Nurturing 2 1 2 2 1 2 9 100% -220 -185 1 1 178 276
Needs Nurturing 2 1 3 2 1 3 13 100% -216 -174 1 1 290 1060
Needs Nurturing 2 2 1 2 2 1 38 100% -222 -187 1 1 36 138
Needs Nurturing 2 2 2 2 2 2 54 100% -220 -179 1 2 138 256
Needs Nurturing 2 2 3 2 2 3 10 100% -217 -173 1 2 290 970
Needs Nurturing 2 3 1 2 3 1 6 100% -212 -180 2 2 96 120
Needs Nurturing 2 3 2 2 3 2 36 100% -217 -174 2 3 166 276
Inactive 1 1 1 1 1 1 10 100% -331 -236 1 1 22 120
Inactive 1 1 2 1 1 2 20 100% -342 -240 1 1 178 266
Inactive 1 1 3 1 1 3 19 100% -335 -223 1 1 310 1416
Inactive 1 2 1 1 2 1 52 100% -363 -222 1 1 14 122
Inactive 1 2 2 1 2 2 57 100% -353 -222 1 1 138 270
Inactive 1 2 3 1 2 3 23 100% -340 -229 1 2 278 1298
Inactive 1 3 1 1 3 1 5 100% -291 -249 2 2 56 118
Inactive 1 3 2 1 3 2 19 100% -340 -224 2 3 160 260
Inactive 1 3 3 1 3 3 66 100% -342 -223 2 5 278 1896

5.2.2.2 RECENCY RANGES

dat_rfm_persona1 %>% 
    group_by(rfm_persona,r) %>% 
    summarise(n = n(),
              min_r = min(recency),
              max_r = max(recency),
              min_f = min(frequency),
              max_f = max(frequency),
              min_m = min(monetary),
              max_m = max(monetary)
              ) %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% 
    select(rfm_persona, r,n, perc_total, everything()) %>% 
    tablekable()
## `summarise()` has grouped output by 'rfm_persona'. You can override using the
## `.groups` argument.
rfm_persona r n perc_total min_r max_r min_f max_f min_m max_m
Loyal 3 166 100% -173 -2 1 6 56 2506
Recent 3 104 100% -167 -1 1 1 28 138
High Potential 2 68 100% -220 -176 2 7 276 1804
Needs Nurturing 2 202 100% -222 -173 1 3 36 1060
Inactive 1 271 100% -363 -222 1 5 14 1896

5.2.2.3 FREQUENCY RANGES

dat_rfm_persona1 %>% 
    group_by(rfm_persona,f) %>% 
    summarise(n = n(),
              min_r = min(recency),
              max_r = max(recency),
              min_f = min(frequency),
              max_f = max(frequency),
              min_m = min(monetary),
              max_m = max(monetary)
              ) %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% 
    select(rfm_persona,f, n, perc_total, everything()) %>% 
    tablekable()
## `summarise()` has grouped output by 'rfm_persona'. You can override using the
## `.groups` argument.
rfm_persona f n perc_total min_r max_r min_f max_f min_m max_m
Loyal 1 60 36.14% -167 -2 1 1 150 1110
Loyal 2 36 21.69% -161 -2 2 2 68 908
Loyal 3 70 42.17% -173 -32 2 6 56 2506
Recent 1 104 100.00% -167 -1 1 1 28 138
High Potential 3 68 100.00% -220 -176 2 7 276 1804
Needs Nurturing 1 58 28.71% -220 -174 1 1 60 1060
Needs Nurturing 2 102 50.50% -222 -173 1 2 36 970
Needs Nurturing 3 42 20.79% -217 -174 2 3 96 276
Inactive 1 49 18.08% -342 -223 1 1 22 1416
Inactive 2 132 48.71% -363 -222 1 2 14 1298
Inactive 3 90 33.21% -342 -223 2 5 56 1896

5.2.2.4 MONETARY RANGES

dat_rfm_persona1 %>% 
    group_by(rfm_persona,m) %>% 
    summarise(n = n(),
              min_r = min(recency),
              max_r = max(recency),
              min_f = min(frequency),
              max_f = max(frequency),
              min_m = min(monetary),
              max_m = max(monetary)
              ) %>% 
    mutate(perc_total = n/sum(n)) %>% 
    ungroup() %>% 
    mutate(perc_total = scales::percent(perc_total)) %>% 
    select(rfm_persona, m, n, perc_total, everything()) %>% 
    tablekable()
## `summarise()` has grouped output by 'rfm_persona'. You can override using the
## `.groups` argument.
rfm_persona m n perc_total min_r max_r min_f max_f min_m max_m
Loyal 1 20 12.05% -155 -36 2 2 56 118
Loyal 2 75 45.18% -170 -2 1 3 146 276
Loyal 3 71 42.77% -173 -3 1 6 278 2506
Recent 1 104 100.00% -167 -1 1 1 28 138
High Potential 3 68 100.00% -220 -176 2 7 276 1804
Needs Nurturing 1 80 39.60% -222 -174 1 2 36 138
Needs Nurturing 2 99 49.01% -220 -174 1 3 138 276
Needs Nurturing 3 23 11.39% -217 -173 1 2 290 1060
Inactive 1 67 24.72% -363 -222 1 2 14 122
Inactive 2 96 35.42% -353 -222 1 3 138 270
Inactive 3 108 39.85% -342 -223 1 5 278 1896

5.3 PERSONA VISUALIZATION

5.3.1 RECENCY

viz_dat_r <- dat_rfm_persona1 %>% 
    mutate(recency = -1*as.integer(recency))

viz_r <- viz_dat_r %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = recency, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)), 
    # geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)), 
    #             alpha = 0.33,
    #             width = 0.33,
    #             height = 0.33,
    #             na.rm = T) +
        # ## orientation to the left
         side = "left",
        # ## move geom to the left
         justification = 1.1,
        # ## adjust grouping (binning) of observations
        binwidth = .25) +
    #scale_fill_viridis_d(direction = -1) +
    theme_ipsum() +
    ylim(1, 365) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Recency",
        subtitle = "Days Since Last Purchase vs Persona",
        caption = "",
        x = "Persona",
        y = "Days Since Purchase",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_r

5.3.2 FREQUENCY

viz_dat_f <- dat_rfm_persona1

viz_f <- viz_dat_f %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = frequency, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    theme_ipsum() +
    ylim(1, 4) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Frequency",
        subtitle = "Total Orders vs Persona",
        caption = "",
        x = "Persona",
        y = "Total Orders",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_f

5.3.3 MONETARY

viz_dat_m <- dat_rfm_persona1

viz_m <- viz_dat_m %>% 
    ggplot(aes(x = fct_reorder(rfm_persona, total_spent), 
               y = monetary, 
               fill = fct_reorder(rfm_persona, total_spent)
               # text = str_glue(
               # "Monetary :: {rfm_persona}
               #  {total_spent} ({m})")
               )) +
       ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
        ## custom bandwidth
        adjust = 0.5,
        ## move geom to the right
        justification = -.2,
        ## remove slab interval
        .width = 0,
        point_colour = NA,
                na.rm = T) +
    geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
        width = .4,
        ## remove outliers
        outlier.color = NA,
        outlier.alpha =  0.33,
        alpha = 0.66,
                na.rm = T) +
    # Add dot plots from {ggdist} package
    ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)), 
    # geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)), 
    #             alpha = 0.33,
    #             width = 0.33,
    #             height = 0.33,
    #             na.rm = T) +
        # ## orientation to the left
         side = "left",
        # ## move geom to the left
         justification = 1.1,
        # ## adjust grouping (binning) of observations
        binwidth = .25) +
        # jitter = TRUE
        #inherit.aes = TRUE
    #scale_fill_viridis_d(direction = -1) +
    theme_ipsum() +
    ylim(0, 600) +
    coord_flip()  +
    labs(
        title = "Persona Raincloud: Monetary",
        subtitle = "Total Spent vs Persona",
        caption = "",
        x = "Persona",
        y = "Total Spent",
        fill = "Persona",
        color = "Persona") +
    viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_m

6 TOP ITEMS

6.1 VIZ DATA

stat_items <- dat_orderlines %>% 
    group_by(item_group, item_name) %>% 
    summarise(n_items = n(),
              n_orders = n_distinct(order_id),
              sum_price = sum(price)
              ) %>% ungroup() %>% 
    mutate(
        revenue = sum_price %>% scales::dollar(accuracy = 1,big.mark = ","),
        perc_total = sum_price/sum(sum_price),
        percent_total = perc_total %>% scales::percent(accuracy = 0.01),
        rankv = -perc_total
    ) %>% select(item_group, item_name, 
                 n_items, n_orders, revenue, percent_total, 
                 everything()) %>% 
    arrange(desc(sum_price),n_orders, n_items) %>% 
    mutate(rank = rank(rankv))
## `summarise()` has grouped output by 'item_group'. You can override using the
## `.groups` argument.
viz_items <- stat_items %>% 
    filter(rank <= 10) %>% 
    ggplot(aes(x = item_name %>% fct_reorder(sum_price), 
               y = sum_price, 
               fill = sum_price,
               text = str_glue(" 
                               Name: {item_name}
                               Revenue: {revenue}
                               % Revenue: {percent_total}
                               Items Sold: {n_items}
                               Total Orders: {n_orders}"))) +
    geom_col()+
    coord_flip() +
    theme_ipsum() +
    labs(
        title = "Top Performing Items",
        subtitle = "Total Spend vs Items",
        caption = "",
        x = "Items",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

6.2 INTERACTIVE

viz_items %>% plotly::ggplotly(tooltip = "text")

6.3 STATIC

viz_items

7 TOP GROUPS

7.1 VIZ DATA

stat_groups <- dat_orderlines %>% 
    group_by(item_group) %>% 
    mutate(receipt = paste0(item_name)) %>% 
    summarise(items_sold = toString(unique(receipt)),
              n_items = n(),
              n_orders = n_distinct(order_id),
              sum_price = sum(price)
              ) %>% ungroup() %>% 
    mutate(
        revenue = sum_price %>% scales::dollar(accuracy = 1),
        perc_total = sum_price/sum(sum_price),
        percent_total = perc_total %>% scales::percent(accuracy = 0.01),
        rankv = -perc_total
    ) %>% select(item_group, items_sold, 
                 n_items, n_orders, revenue, percent_total,
                 everything()) %>% 
    arrange(desc(sum_price),n_orders, item_group) %>% 
    mutate(rank = rank(rankv))

viz_groups <- stat_groups %>% 
    filter(rank <= 10) %>% 
    ggplot(aes(x = item_group %>% fct_reorder(sum_price), 
               y = sum_price, 
               fill = sum_price,
               text = str_glue(" 
                               Group: {item_group}
                               Revenue: {revenue}
                               % Revenue: {percent_total}
                               Items Sold: {n_items}
                               Total Orders: {n_orders}
                               Items Sold: {items_sold}"
               ))) +
    geom_col()+
    coord_flip() +
    theme_ipsum()  +
    labs(
        title = "Top Performing Item Groups",
        subtitle = "Total Spend vs Item Groups",
        caption = "",
        x = "Item Group",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

7.2 INTERACTIVE

viz_groups %>% plotly::ggplotly(tooltip = "text")

7.3 STATIC

viz_groups

8 TOP CUSTOMERS

8.1 VIZ DATA

stat_customers <- dat_customers %>% 
    mutate(rankv = -monetary,
           perc_total = monetary/sum(monetary)) %>% 
    arrange(desc(monetary), desc(frequency), desc(recency)) %>% 
    mutate(rank = rank(rankv),
           percent_total = scales::percent(perc_total))

viz_customers_rank <- stat_customers %>%
    filter(rank <= 15) %>% 
    ggplot(aes(x = customer_name %>% fct_reorder(monetary), 
               y = monetary, 
               fill = monetary,
               text = str_glue(
               "
               {customer_name} is ranked no.{as.integer(rank)} with:
               Revenue: {total_spent}
               % Revenue: {percent_total}
               Total Orders: {total_orders}
               Total Items: {total_items}
               Receipt: {receipt}")
              )) +
    geom_col() +
    coord_flip() +
    theme_ipsum() + 
    labs(
        title = "Top Customers (n=15)",
        subtitle = "Total Spend vs Customer",
        caption = "",
        x = "Persona",
        y = "Total Spend",
        fill = "Total Spend") +
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

8.2 INTERACTIVE

viz_customers_rank %>% plotly::ggplotly(tooltip = "text")

8.3 STATIC

viz_customers_rank

8.4 POLAR for fun

viz_customers_polar <- stat_customers %>%
    filter(rank <= 360) %>% 
    ggplot(aes(x = customer_name %>% fct_reorder(monetary), 
               y = monetary, 
               fill = monetary,
               text = str_glue(
               "
               {customer_name} is ranked {as.integer(rank)} with:
               Revenue: {total_spent}
               % Revenue: {percent_total}
               Total Orders: {total_orders}
               Total Items: {total_items}
               Receipt: {receipt}")
              )) +
    geom_col() +
    coord_polar() + 
    theme_ipsum()  +
    labs(
        title = "Top Customers in the Arctic (n=50)",
        subtitle = "Total Spend vs Customer",
        caption = "",
        x = "Persona",
        y = "Total Spend",
        fill = "Total Spend") +   
    theme(
      legend.position = "bottom",
#     axis.text = element_blank(),
#     axis.text.x = element_text(angle = 90),
#     axis.title = element_blank(),
#      panel.grid = element_blank(),
     plot.margin = margin(.5, .5, .5, .5, "cm")
) 

viz_customers_polar

9 APPENDIX

9.1 OUTBOX

writexl::write_xlsx(x = list(orderlines = dat_orderlines,
                             orders = dat_orders, 
                             customers = dat_customers),
                    path = "outbox/Bloom Daddy's Druid Data.xlsx")